## Authors: Alexander Herzog and Kenneth Benoit
## Date: May 30, 2015
## Replication file for JOP article "The Most Unkindest Cuts: Speaker Selection and Expressed Government Dissent During Economic Crisis"

rm(list = ls(all = TRUE))

library(rjags)
library(ggplot2)

### PATHS ########################################
dataAll <- "./generated_data/working_data_all.RData"
posteriorData <- "./estimated_models/selection_model3_posterior.RData"
##################################################

load(dataAll)
data <- dataAll
load(posteriorData)


# factors in original data
data$crisis <- factor(data$crisis, levels=c("0", "1"), labels=c("Pre-crisis", "Crisis"))

# convert coda-object to matrix object
mcmcChain <- as.matrix(spoke3.posterior)

# extract sampled values
a <- rowMeans(mcmcChain[, grepl("a\\[", colnames(mcmcChain))])
b <- mcmcChain[,grepl("b\\[", colnames(mcmcChain))]
sigma.a <- mcmcChain[,"sigma.a"]


# label coefficients
colnames(b) <- c("unemployment",
                 "safetyScaled",
                 "seniorityYearsScaled",
                 "party.leader",
                 "government",
                 "cabMember",
                 "log.party.size",
                 "log.debate.days",
                 "crisis",
                 "crisis*unemployment",
                 "crisis*safetyScaled",
                 "crisis*government",
                 "government*unemployment",
                 "government*safetyScaled",
                 "crisis*government*unemployment",
                 "crisis*government*safetyScaled")



# Predicted probabilities for changes in unemployment rate
# --------------------------------------------------------
# create matrix of x values for prediction
startVal <- min(data$unemployment)
endVal <- max(data$unemployment)

N <- 100
pred <- with(data, expand.grid(
         unemployment = seq(startVal, endVal, (endVal-startVal)/(N-1)),
         safetyScaled = 0,
         seniorityYearsScaled = 0,
         party.leader = 0,
         government = c(0, 1),
         cabMember = 0,
         log.party.size = mean(data$log.party.size),
         log.debate.days = mean(data$log.debate.days),
         crisis = c(0, 1))
)


# define matrix for posterior predicted y values at each x value
sample.size <- nrow(b)
yPred <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )


# generate posterior predicted y values for each x value
for (i in 1:sample.size) {
    yPred[,i] <- pnorm(a[i] +
             b[i, "unemployment"] * pred$unemployment +
             b[i, "safetyScaled"] * pred$safetyScaled +
             b[i, "seniorityYearsScaled"] * pred$seniorityYearsScaled +
             b[i, "party.leader"] * pred$party.leader +
             b[i, "government"] * pred$government +
             b[i, "cabMember"] * pred$cabMember +
             b[i, "log.party.size"] * pred$log.party.size +
             b[i, "log.debate.days"] * pred$log.debate.days +
             b[i, "crisis"] * pred$crisis +
             b[i, "crisis*unemployment"] * pred$crisis * pred$unemployment +
             b[i, "crisis*safetyScaled"] * pred$crisis * pred$safetyScaled +
             b[i, "crisis*government"] * pred$crisis * pred$government +
             b[i, "government*unemployment"] * pred$government * pred$unemployment +
             b[i, "government*safetyScaled"] * pred$government * pred$safetyScaled +
             b[i, "crisis*government*unemployment"] * pred$crisis * pred$government * pred$unemployment +
             b[i, "crisis*government*safetyScaled"] * pred$crisis * pred$government * pred$safetyScaled
             )
}

pred$prob <- rowMeans(yPred)
pred$ci.lower <- apply(yPred, 1, function(x) quantile(x, prob=c(0.025)))
pred$ci.upper <- apply(yPred, 1, function(x) quantile(x, prob=c(0.975)))


# plot predicted values with ci
# -----------------------------
pred$government <- factor(pred$government, levels=c("1", "0"), labels=c("Government", "Opposition"))

pred$crisis <- factor(pred$crisis, levels=c("0", "1"), labels=c("Pre-crisis", "Crisis"))

xLabMin <- round(unique(data$lr_prop[data$unemployment==startVal]), 1)
xLabMax <- round(unique(data$lr_prop[data$unemployment==endVal]), 1)

numb.breaks <- 10
breaks.scaled.x <- round(seq(startVal, endVal, (endVal-startVal)/(numb.breaks-1)), 2)
breaks.labels <- paste0(round(seq(xLabMin, xLabMax, (xLabMax-xLabMin)/(numb.breaks-1)), 2)*100, "%")

p <- ggplot(pred, aes(x=unemployment, y=prob, group=government)) +
    # theme
    theme_bw() +
    # scales
    scale_x_continuous(breaks=breaks.scaled.x, labels=breaks.labels) +
    scale_y_continuous(breaks=seq(0,1,0.25), limits=c(0,1)) +
    # facet
    facet_wrap(~ crisis) +
    # lines and ci
    geom_line(aes(lty = government), size=1) +
    geom_ribbon(aes(ymin = ci.lower, ymax = ci.upper), alpha = 0.2) +
    labs(linetype="") +
    # distribution of actual data    
    geom_rug(data=data, aes(x=unemployment, y=spoke), side="b") +
    # title
    labs(title="Effect of constituency unemployment on probability of speaking") +
    # axis labels
    xlab("Constituency unemployment") +
    ylab("Predicted probability") +
    # theme values (text size, margins, position of legends, etc.)
    theme(
        legend.position="bottom",
        title = element_text(size=14, vjust=1.5),
        axis.title.x = element_text(vjust=-0.5),
        axis.title.y = element_text(size=12, vjust=0.4),
        axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
        strip.text.x = element_text(size=12),
        legend.text = element_text(size=12)
        )

pdf("./plots/predicted_probabilities_speaking_unemployment.pdf", 12, 6)
print(p)
dev.off()



# Predicted probabilities for changes in electoral safety
# -------------------------------------------------------
# create matrix of x values for prediction
startVal <- min(data$safetyScaled)
endVal <- max(data$safetyScaled)

N <- 100
pred <- with(data, expand.grid(
         unemployment = 0,
         safetyScaled = seq(startVal, endVal, (endVal-startVal)/(N-1)),
         seniorityYearsScaled = 0,
         party.leader = 0,
         government = c(0, 1),
         cabMember = 0,
         log.party.size = 0,
         log.debate.days = mean(data$log.debate.days),
         crisis = c(0, 1))
)


# define matrix for posterior predicted y values at each x value
sample.size <- nrow(b)
yPred <- matrix( 0 , nrow=nrow(pred) , ncol=sample.size )

# generate posterior predicted y values for each x value
for (i in 1:sample.size) {
    yPred[,i] <- pnorm(a[i] +
             b[i, "unemployment"] * pred$unemployment +
             b[i, "safetyScaled"] * pred$safetyScaled +
             b[i, "seniorityYearsScaled"] * pred$seniorityYearsScaled +
             b[i, "party.leader"] * pred$party.leader +
             b[i, "government"] * pred$government +
             b[i, "cabMember"] * pred$cabMember +
             b[i, "log.party.size"] * pred$log.party.size +
             b[i, "log.debate.days"] * pred$log.debate.days +
             b[i, "crisis"] * pred$crisis +
             b[i, "crisis*unemployment"] * pred$crisis * pred$unemployment +
             b[i, "crisis*safetyScaled"] * pred$crisis * pred$safetyScaled +
             b[i, "crisis*government"] * pred$crisis * pred$government +
             b[i, "government*unemployment"] * pred$government * pred$unemployment +
             b[i, "government*safetyScaled"] * pred$government * pred$safetyScaled +
             b[i, "crisis*government*unemployment"] * pred$crisis * pred$government * pred$unemployment +
             b[i, "crisis*government*safetyScaled"] * pred$crisis * pred$government * pred$safetyScaled
             )
}

pred$prob <- rowMeans(yPred)
pred$ci.lower <- apply(yPred, 1, function(x) quantile(x, prob=c(0.025)))
pred$ci.upper <- apply(yPred, 1, function(x) quantile(x, prob=c(0.975)))


# plot predicted values with ci
# -----------------------------
pred$government <- factor(pred$government, levels=c("1", "0"), labels=c("Government", "Opposition"))

pred$crisis <- factor(pred$crisis, levels=c("0", "1"), labels=c("Pre-crisis", "Crisis"))

xLabMin <- round(unique(data$safety[data$safetyScaled==startVal]), 1)
xLabMax <- round(unique(data$safety[data$safetyScaled==endVal]), 1)

numb.breaks <- 12
breaks.scaled.x <- round(seq(startVal, endVal, (endVal-startVal)/(numb.breaks-1)), 2)
breaks.labels <- round(seq(xLabMin, xLabMax, (xLabMax-xLabMin)/(numb.breaks-1)), 2)


p <- ggplot(pred, aes(x=safetyScaled, y=prob, group=government)) +
    # theme
    theme_bw() +
    # scales
    scale_x_continuous(breaks=breaks.scaled.x, labels=breaks.labels) +
    scale_y_continuous(breaks=seq(0,1,0.25), limits=c(0,1)) +
    # facet
    facet_wrap(~ crisis) +
    # lines and ci
    geom_line(aes(lty = government), size=1) +
    geom_ribbon(aes(ymin = ci.lower, ymax = ci.upper), alpha = 0.2) +
    labs(linetype="") +
    # distribution of actual data    
    geom_rug(data=data, aes(x=safetyScaled, y=spoke), side="b") +
    # title
    labs(title="Effect of electoral safety on probability of speaking") +
    # axis labels
    xlab("Electoral safety") +
    ylab("Predicted probability") +
    # theme values (text size, margins, position of legends, etc.)
    theme(
        legend.position="bottom",
        title = element_text(size=14, vjust=1.5),
        axis.title.x = element_text(vjust=-0.5),
        axis.title.y = element_text(size=12, vjust=0.4),
        axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
        strip.text.x = element_text(size=12),
        legend.text = element_text(size=12)
        )

pdf("./plots/predicted_probabilities_speaking_safety.pdf", 12, 6)
print(p)
dev.off()

